package com.hao.service.impl;

import com.baomidou.mybatisplus.service.impl.ServiceImpl;
import com.hao.crawler.Crawler;
import com.hao.crawler.DomCrawler;
import com.hao.entity.CrawlerDom;
import com.hao.entity.CrawlerUrl;
import com.hao.mapper.CrawlerDomMapper;
import com.hao.mapper.CrawlerUrlMapper;
import com.hao.service.CrawlerDomService;
import com.hao.util.ApplicationProperties;
import com.hao.util.FileUtils;
import com.hao.util.HttpClientUtil;
import com.hao.util.StringUtils;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;

import java.io.File;
import java.io.FileOutputStream;
import java.util.Date;

/**
 * <p>
 * 页面元素 服务实现类
 * </p>
 *
 * @author R.hao
 * @since 2017-07-16
 */
@Service
@Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
public class CrawlerDomServiceImpl extends ServiceImpl<CrawlerDomMapper, CrawlerDom> implements CrawlerDomService {

    private static final Logger logger = LoggerFactory.getLogger(CrawlerDomServiceImpl.class);

    @Autowired
    private CrawlerDomMapper domMapper;

    @Autowired
    private CrawlerUrlMapper urlMapper;

    /**
     * 运行爬虫
     * 爬取失败：dom状态修改成 3
     * 爬取成功：dom状态修改成 1
     * 当页面dom爬取完成 url状态修改成 1
     *
     * @param topLog     top日志
     * @param crawlerUrl 爬虫Url
     */
    @Override
    public void runCrawler(String topLog, CrawlerUrl crawlerUrl) {
        if (null == crawlerUrl)
            return;
        String myTopLog = topLog + crawlerUrl.getId() + "-";
        try {

            Crawler crawler = new DomCrawler();
            Elements images = crawler.run(crawlerUrl.getUrl());
            Element image = images.first();
            if(null == image){
                crawlerUrl.setStatus(1);
                this.urlMapper.updateById(crawlerUrl);
                return;
            }
            String absUrl = image.absUrl("src");
            if (StringUtils.isEmpty(absUrl)) {
                crawlerUrl.setStatus(1);
                this.urlMapper.updateById(crawlerUrl);
                return;
            }

            String fileName = FileUtils.fileName(absUrl);
            CrawlerDom crawlerDom = new CrawlerDom();
            crawlerDom.setContent(absUrl);
            crawlerDom.setTitle(fileName);
            crawlerDom.setUrlId(crawlerUrl.getId());
            crawlerDom.setCreateTime(new Date());
            crawlerDom.setType("0");
            this.domMapper.insert(crawlerDom);

            downloadImage(crawlerUrl, crawlerDom);
            this.domMapper.updateById(crawlerDom);

            crawlerUrl.setStatus(1);
            this.urlMapper.updateById(crawlerUrl);
        } catch (Exception e) {
            logger.info(myTopLog + crawlerUrl.getUrl(), e);
        }
    }

    /**
     * 图片下载
     * 使用httpClient模拟浏览器进行下载
     *
     * @param url 地址
     * @param dom dom
     */
    private void downloadImage(CrawlerUrl url, CrawlerDom dom) {
        File file = new File(ApplicationProperties.FILE_SAVE_PATH +
                File.separator + url.getEnterName() +File.separator + url.getTitle() + File.separator + dom.getTitle());
        File parent = file.getParentFile();

        if (!parent.exists()) {
            boolean mk = parent.mkdirs();
            if (!mk) {
                logger.error("folder-" + file.getParent() + "create faild");
                return;
            }
        }
        try {
            downloadImage(dom, file);
            if (FileUtils.isEmptyFolder(parent)) {
                parent.delete();
            }
        } catch (Exception e) {
            logger.error("download image feild", e);
            dom.setStatus(3);
        }
    }

    private static void downloadImage(CrawlerDom dom, File file) throws Exception {
        HttpClient httpClient = HttpClientUtil.createHttpClient(20, 20, 120000, 30000, 3000);
        HttpGet listHttpGet = getImageHttp(dom.getContent());
        HttpResponse listResponse = httpClient.execute(listHttpGet);
        if (listResponse.getStatusLine().getStatusCode() == 200) {
            logger.info("download image success");
            FileOutputStream output = new FileOutputStream(file);
            output.write(HttpClientUtil.read(listResponse.getEntity().getContent()));
            output.close();
            dom.setStatus(1);
        } else {
            logger.info("download image feild");
            dom.setStatus(3);
        }
    }

    private static HttpGet getImageHttp(String url) {
        HttpGet listHttpGet = new HttpGet(url);
        listHttpGet.setHeader("Connection", "keep-alive");
        listHttpGet.setHeader("Content-Type", "image/jpeg");
        listHttpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36");
        return listHttpGet;
    }
}
